
*** Set paths
local dir "..."
local dropbox_data "..."
local dropbox_data_create "..."
local local_data "..."
local dropbox_fig "..."
local dropbox_tab "..."


************************************************************
*** LOAD DATA 
************************************************************

use "`local_data'/data_base_bonds_bills.dta", clear  // created in mainfile_datacreation.do

************************************************************
*** PREPARE DATA
************************************************************

if $parentdealer ==1{
	
	*Use bidderid on the parent level
	tab bidderid if bidderid==submitterid

	replace bidderid=18 if bidderid==18001
	replace bidderid=24 if bidderid==24001
	replace bidderid=46 if bidderid==46001	

	replace submitterid=18 if submitterid==18001
	replace submitterid=24 if submitterid==24001
	replace submitterid=46 if submitterid==46001
}


if $yas ==1{ // This is the standard specfication

	replace IIROC_YIELD = YAS_BOND_YLD if market_type=="secondary"
	replace bidyield_bl = bidYAS_bl 
	replace askyield_bl = askYAS_bl
	replace midyield_bl = midYAS_bl 

	replace bidyield_can = bidYAS_can
	replace askyield_can = askYAS_can
}

if $yas ==0{

	*** USE THE BOND-YIELD EQUIVALENT YIELD FOR BILLS 
	gen double IIROC_YIELD_bill= (IIROC_PRICE/`F')^(-182.5/days_to_maturity)* (200 - 200 *(IIROC_PRICE/`F')^(182.5/days_to_maturity)) *100 ///
	if type=="bill" & market_type=="secondary"

	gen double midyield_bl_bill= (midprice_bl/`F')^(-182.5/days_to_maturity)* (200 - 200 *(midprice_bl/`F')^(182.5/days_to_maturity)) *100 ///
	if type=="bill" & market_type=="secondary"

	gen double bidyield_can_bill= (bidprice_can/`F')^(-182.5/days_to_maturity)* (200 - 200 *(bidprice_can/`F')^(182.5/days_to_maturity)) *100 ///
	if type=="bill" & market_type=="secondary"

	gen double askyield_can_bill= (askprice_can/`F')^(-182.5/days_to_maturity)* (200 - 200 *(askprice_can/`F')^(182.5/days_to_maturity)) *100  ///
	if type=="bill" & market_type=="secondary"

	replace IIROC_YIELD = IIROC_YIELD_bill 		if type=="bill" & market_type=="secondary"
	replace midyield_bl = midyield_bl_bill 		if type=="bill" & market_type=="secondary"
	replace bidyield_can = bidyield_can_bill 	if type=="bill" & market_type=="secondary"
	replace askyield_can = askyield_can_bill 	if type=="bill" & market_type=="secondary"

	*YIELDS THAT ARE 0 for some strange reason
	replace IIROC_YIELD=. if IIROC_YIELD==0 & type!="bill"
	replace IIROC_YIELD=. if IIROC_YIELD==0 & (type=="bill" & IIROC_PRICE!=`F')

}




**************************************************
**** FILL IN MISSING QUOTES 
**************************************************


if $fill ==1{

	foreach var of varlist bidprice_bl askprice_bl midprice_bl bidyield_bl askyield_bl midyield_bl  bidprice_can askprice_can bidyield_can askyield_can   {
		bys date isin: egen a0 = mean(`var')
		replace `var'=a0 if `var'==.
		drop a0 			
	}

	*use bloomberg quotes when candeal is empty
	replace bidprice_can = bidprice_bl if bidprice_can==.
	replace askprice_can = askprice_bl if askprice_can==.
	
	replace bidyield_can = bidyield_bl if bidyield_can==.
	replace askyield_can = askyield_bl if askyield_can==.
}


************************************************************
*** SAMPEL RESTRICTION: TRADES WITH INVESTORS
************************************************************

keep if market_type  == "secondary"
keep if counter_type == "CLIENT" 
count

************************************************************
*** DROP CRAZY YIELDS
************************************************************

gen markup   		=  (IIROC_YIELD - midyield_bl)
gen markup2 		=   markup if IIROC_SIDE=="BUY" 
replace markup2 	= - markup if IIROC_SIDE=="SELL" 


*histogram before dropping crazy yields
twoway (hist markup2 if retailer==1 & IIROC_SIDE=="BUY", percent lcolor(gs12) fcolor(gs12) bin(50)) (hist markup2 if retailer==1 & IIROC_SIDE=="SELL", percent fcolor(none) lcolor(black)  bin(50)), xti("")  leg(order(1 2) row(1) label(1 "R buys") label(2 "R sells")  region(lc(white))) graphregion(color(white))
		
twoway (hist markup2 if retailer==0 & IIROC_SIDE=="BUY", percent lcolor(gs12) fcolor(gs12) bin(50)) (hist markup2 if retailer==0 & IIROC_SIDE=="SELL", percent fcolor(none) lcolor(black)  bin(50)), xti("")  leg(order(1 2) row(1) label(1 "investor buys") label(2 "investor sells")  region(lc(white))) graphregion(color(white))


if $smoothing ==1{

	gen markup2_org = markup2	
	
	*drop outliers 
	bys retailer : egen p1 = pctile(markup2), p($percL )
	bys retailer : egen p99 = pctile(markup2), p($percU )


	replace IIROC_YIELD =. if markup2<p1 & markup2!=.
	replace IIROC_YIELD =. if markup2>p99 & markup2!=.
	replace IIROC_PRICE =. if markup2<p1 & markup2!=.
	replace IIROC_PRICE =. if markup2>p99 & markup2!=.

	replace markup2=. if markup2<p1 & markup2!=.
	replace markup2=. if markup2>p99 & markup2!=.

	*histograms after droping crazy yields
	twoway (hist markup2 if retailer==1 & IIROC_SIDE=="BUY", percent lcolor(gs12) fcolor(gs12) bin(50))///
	(hist markup2 if retailer==1 & IIROC_SIDE=="SELL", percent fcolor(none) lcolor(black)  bin(50)), ///
	xti("")  leg(order(1 2) row(1) label(1 "R buys") label(2 "R sells")  region(lc(white))) graphregion(color(white))

	twoway (hist markup2 if retailer==0 & IIROC_SIDE=="BUY", percent lcolor(gs12) fcolor(gs12) bin(50)) ///
	(hist markup2 if retailer==0 & IIROC_SIDE=="SELL", percent fcolor(none) lcolor(black)  bin(50)), ///
	xti("")  leg(order(1 2) row(1) label(1 "investor buys") label(2 "investor sells")  region(lc(white))) graphregion(color(white))
}




************************************************************
*** CREATE USEFUL VARIABLES	
************************************************************

*lei id
egen lei_id = group(LEI)
	
*variables for the regression
gen institutional = (retailer==0)

*time grid 
gen hour2 = hh(exec_time_grid)
gen min2  = mm(exec_time_grid)
gen time_grid= hms(hour2,min2,00)  
format time_grid %tcHH:MM

gen time_grid_num = time_grid 
label define time_grid_lab 0 "00:00" 3600000 "01:00" 7200000 "02:00"	 ///
					   10800000 "03:00" 14400000 "04:00" 18000000 "05:00" ///
					   21600000 "06:00" 25200000 "07:00" 28800000 "08:00" ///
					   32400000 "09:00" 36000000 "10:00" 39600000 "11:00" ///
					   43200000 "12:00" 46800000 "13:00" 50400000 "14:00" ///
					   54000000 "15:00" 57600000 "16:00" 61200000 "17:00" ///
					   64800000 "18:00" 68400000 "19:00" 72000000 "20:00" ///
					   75600000 "21:00" 79200000 "22:00" 82800000 "23:00"
					   
label values time_grid time_grid_lab 					   
					   
*year, month and week
drop year 
gen year = year(date)
gen month = month(date)

*indicator for time change 
gen newtime     = (date>=td(20march2017)) 				 // day of treatment 
replace newtime = 0 if (date==td(20march2017) & (time_grid<tc(04:00))) // time of treatment

*week: (note that week(date) is not what I want. It says week 2 on Friday already)	 
tempfile tempbase
save  `tempbase' , replace
collapse (max) newtime, by(date year)
gen dayofweek = dow(date)
gen a1 = (dayofweek==1)
bys year: gen week = sum(a1) 
drop a1

bys newtime (date): gen a0=_n
gen dateop = - date 
bys newtime (dateop): gen a1=_n 
drop dateop

gen time_dum = a0-1 		if newtime==1
replace time_dum = - a1 	if newtime==0


label var time_dum "0 at policy change, -1 on the trading day before, -2 the one before, 1 the trading day after, 2 the one after ..."
keep week date  time_dum

tempfile tempadd
save  `tempadd' , replace

use `tempbase', clear 
merge m:1 date using   `tempadd'
drop _merge


*dummy for whether CanDeal is open
gen before_time_change = (date<td(20march2017)) // dummy for a time before March 20 2017
gen open_can = 0 if time_grid!=.
replace open_can  = 1 if (time_grid>=tc(07:00) & time_grid<=tc(18:00)   & before_time_change==1  )
replace open_can  = 1 if (time_grid>=tc(04:00) & time_grid<=tc(18:00) &  before_time_change==0)
replace open_can =. if exec_time==.

*dummy for regular business hours 
gen business_hours = 0 if time_grid!=.
replace business_hours  = 1 if (time_grid>=tc(07:00) & time_grid<=tc(17:00))
replace business_hours =. if exec_time==.


************************************************************
*** NORMALIZE YIELDS 
************************************************************

drop quantity
replace IIROC_QUANTITY=IIROC_QUANTITY/100 
gen quant1 = IIROC_QUANTITY
gen quant2 = IIROC_QUANTITY*IIROC_QUANTITY
gen quant3 = IIROC_QUANTITY*IIROC_QUANTITY*IIROC_QUANTITY
local todrop quant*

gen side = (IIROC_SIDE=="SELL")

*show prices don't matter
eststo: reghdfe IIROC_YIELD midyield_bl side if trackclient!="no", absorb(exec_time_grid isin_id#week#year lei_id) vce(cluster lei_id)
esttab using  "`dropbox_tab'/reg_prices_dont_matter.tex", wide title("") se ar2 nobaselevels label mtitle("")  nonumbers   replace 
eststo clear

*normalization regression
reghdfe IIROC_YIELD side, absorb(feh3=exec_time_grid fes3=isin_id#week#year) residuals(resids3)
gen yield     =  _b[_cons]  + _b[side] + resids3 if IIROC_SIDE=="SELL"
replace yield =  _b[_cons]  + resids3 		     if IIROC_SIDE=="BUY"

	*label dealers 
	label define mat_lab 1 "0-1Y" 2 "1-2Y" 3 "2-3Y"	///
						4 "3-4Y" 5 "5-6Y" 6 "6-7Y"	 ///
						7 "7-8Y" 8 "8-9Y" 9 "9-10Y"
					   
	label values mat_type_year mat_lab  

	egen py1 = pctile(IIROC_YIELD), p(1)
	egen py99 = pctile(IIROC_YIELD), p(99)
    egen py12 = pctile(yield), p(1)
	egen py992 = pctile(yield), p(99)


	*Boxplots with raw yields and normalized yields
	graph box yield IIROC_YIELD if mat_type_year<=9 & IIROC_YIELD>=py1 & IIROC_YIELD<=py99 & yield>=py12 & yield<=py992 ///
								, over(mat_type_year)  leg(order(1 2) label(1 "normalized yield") label(2 "raw data") ///
								region(lc(white))) yti(basis points) graphregion(color(white)) box(1, fcolor(black) ///
								lcolor(black)) box(2, fcolor(white) lcolor(black)) note("") marker(1, msize(vsmall) ///
								msymbol(circle_hollow) mcolor(black))

	graph export "`dropbox_fig'/graph_box_norm_yields.png", replace


gen midyield_bl_norm  = midyield_bl  - feh3 - fes3 
gen bidyield_bl_norm  = bidyield_bl  - feh3 - fes3 
gen askyield_bl_norm  = askyield_bl  - feh3 - fes3 
gen bidyield_can_norm = bidyield_can - feh3 - fes3 
gen askyield_can_norm = askyield_can - feh3 - fes3 


*store dealer-investor fixed effects - not used anywhere
reghdfe IIROC_YIELD side , absorb(feij = bidderid#lei_id exec_time_grid isin_id#week#year) 
drop side quant*
replace IIROC_QUANTITY=IIROC_QUANTITY*100


************************************************************
*** EXCLUDE FALSE RETAIL LABELS
************************************************************

*** Counter number of times the client switches
bys LEI (exec_time): gen a_switch= (retailer!=retailer[_n-1]) if LEI!="anonymous"
bys LEI (exec_time): gen a1 = _n  if  LEI!="anonymous"

replace a_switch = 0 if a1==1   & LEI!="anonymous" // this is the first time the LEI shows up. Can't be a switch
label var a_switch "assumes value 1 when the switch takes place (any direction)"
	
sort LEI exec_time
bys LEI: egen N_switches = sum(a_switch) if  LEI!="anonymous"
tab N_switches 
drop a1


*** Determine the direction in which the client switches
bys LEI (exec_time): gen a1 = _n 
bys LEI (exec_time): gen aIR= (retailer!=retailer[_n-1]) & retailer==1  if LEI!="anonymous" // from institutional to retailer
replace aIR = 0 if a1==1
bys LEI: egen N_switches_IR = sum(aIR) if  LEI!="anonymous"

bys LEI (exec_time): gen aRI= (retailer!=retailer[_n-1]) & retailer==0  if LEI!="anonymous" // from retailer to institutional
replace aRI = 0 if a1==1
bys LEI: egen N_switches_RI = sum(aRI) if  LEI!="anonymous"

drop a1


	

************************************************************
*** SHOW SAMPLE RESTRICTIONS (for table in the Appendix)
************************************************************

*all dealer-to-investor trades
count
*exclude the yield outerliers
count if IIROC_YIELD!=.
*exclude in-house trades 
count if IIROC_YIELD!=. & gleif_related==""
*exclude other trading_venues
count if IIROC_YIELD!=. & gleif_related=="" & (trading_venue=="OTC" | trading_venue=="CanDeal")
*exclude out of business hours 
count if IIROC_YIELD!=. & gleif_related=="" & (trading_venue=="OTC" | trading_venue=="CanDeal") & open_can==1
*drop large trades 
count if IIROC_YIELD!=. & gleif_related=="" & (trading_venue=="OTC" | trading_venue=="CanDeal") & open_can==1 & IIROC_QUANTITY<=$size
*exclude multiple switchers 

count if IIROC_YIELD!=. & gleif_related=="" & (trading_venue=="OTC" | trading_venue=="CanDeal") & open_can==1 & ((N_switches==1| N_switches==0) |  trackclient=="no") & IIROC_QUANTITY<=$size 


************************************************************
***  RESTIRCTED SAMPLE  
************************************************************

*drop false reported types 
keep if (N_switches==1| N_switches==0) |  trackclient=="no" // clean out these are wrong

	*save "`local_data'/data_base_bills_clients.dta", replace
	*NOTE: THIS FILE IS NOT USED ANYWHERE - CAN BE DELETED EVENTUALLY

	
*keep only OTC and CanDeal
keep if trading_venue=="OTC" | trading_venue=="CanDeal"


** drop missing prices and too large trades
drop if IIROC_QUANTITY> $size *100


*only clients that we can trace through the market 
if $trackclient ==1{
keep if trackclient!="no"
}

*only business hours 
if $open_can ==1{ 
keep if open_can==1
}


************************************************************
***  SAVE BASE DATA 
************************************************************

save "`local_data'/data_base_bills_clients_matlab_2022b.dta", replace 




































